library(dplyr)
library(ggmap)
library(rgdal)
library(sp)
fake_addresses <- data.frame(ID = c(123,234,653),
House_Num = c(3535, 2716, 1915),
Street = c("Market St.",
"South Street",
"S 9th St."),
City = "Philadelphia",
State = "PA",
ZIP = c("19104",
"19148",
"19147"))
fake_addresses_1 <- data.frame(ID = c(123,234,653),
Street = c("3535 Market St.",
"2716 South Street",
"1915 S 9th St."),
City = "Philadelphia",
State = "PA",
ZIP = c("19104",
"19148",
"19147"))
fake_addresses_2 <- data.frame(code = c(345,623,123),
address = c("3535 Market St., Philadelphia, PA, 19104",
"2716 South Street, Philadelphia, PA, 19148",
"1915 S 9th St., Philadelphia, PA, 19147"))
It’s important to always get lat/long because polygon boundaries change, or you might want different polygons, but lat/long is good forever / until the end of civilization as we know it
I did this the lazy way! You can use Google as the source for the geocode but that requires a (potentially paid at high levels) API key secured with a credit card number. So I’m using “dsk” (data science tookit) instead.
ZERO error checking here, e.g. if address is missing etc. If you have something ungeocodable, it’ll barf. Maybe I could put try/catch in here?
We put in column index numbers – see parameter declaration for deets.
Here we assume that the address will either be all together, e.g. “123 Main Street, Smithville, NY 10010”, so, e.g.,
geocodeToLatLong(fake_addresses_2, 2)
or completely fragmented, e.g. 123 | Main Street | Smithville | NY | 10010, so, e.g.
geocodeToLatLong(fake_addresses,NA, 2,3,4,5,6)
NOTE you can leave one or maybe two fields as NA (say if you have house number and street name together in one col) and it may/should still work… but YMMV: e.g.
geocodeToLatLong(fake_addresses_1,NA, NA,2,3,4,5)
Either way we give column indices.
Returns df back but with first 2 cols as lat/long
geocodeToLatLong <- function(df,
full_address = NA, # col number, or NA
house_num_idx = 1, # col number /NA (ignored if full_address given)
street_idx = 2, # col number /NA (ignored if full_address given)
city_idx = 3, # col number /NA (ignored if full_address given)
state_idx = 4, # col number /NA (ignored if full_address given)
zip_idx = 5) { # col number /NA (ignored if full_address given)
if (!is.na(full_address)) {
addresses <- as.character(df[,full_address])
}
else {
addresses <- vector(mode="character", length=nrow(df))
if (!is.na(house_num_idx)) {addresses <- paste(addresses, df[,house_num_idx], sep = "")}
if (!is.na(street_idx)) {addresses <- paste(addresses, df[,street_idx], sep = " ")}
if (!is.na(city_idx)) {addresses <- paste(addresses, df[,city_idx], sep = ", ")}
if (!is.na(state_idx)) {addresses <- paste(addresses, df[,state_idx], sep = ", ")}
if (!is.na(zip_idx)) {addresses <- paste(addresses, df[,zip_idx], sep = " ")}
}
geocoded <- data.frame(address = addresses, stringsAsFactors = FALSE) %>%
mutate_geocode(address, source = "dsk") # not using Google bc of limits / CC# / $$$
return(cbind(geocoded %>% select(lat,lon), df))
}
geocoded_0 <- geocodeToLatLong(fake_addresses,NA, 2,3,4,5,6)
geocoded_0
## lat lon ID House_Num Street City State ZIP
## 1 39.95615 -75.19291 123 3535 Market St. Philadelphia PA 19104
## 2 39.94525 -75.18541 234 2716 South Street Philadelphia PA 19148
## 3 39.92938 -75.15996 653 1915 S 9th St. Philadelphia PA 19147
geocoded_1 <- geocodeToLatLong(fake_addresses_1,NA,NA,2,3,4,5)
geocoded_1
## lat lon ID Street City State ZIP
## 1 39.95615 -75.19291 123 3535 Market St. Philadelphia PA 19104
## 2 39.94525 -75.18541 234 2716 South Street Philadelphia PA 19148
## 3 39.92938 -75.15996 653 1915 S 9th St. Philadelphia PA 19147
geocoded_2 <- geocodeToLatLong(fake_addresses_2, 2)
geocoded_2
## lat lon code address
## 1 39.95615 -75.19291 345 3535 Market St., Philadelphia, PA, 19104
## 2 39.94525 -75.18541 623 2716 South Street, Philadelphia, PA, 19148
## 3 39.92938 -75.15996 123 1915 S 9th St., Philadelphia, PA, 19147
OK, now you gotta get a map of the area you care about… this can be trixy. I downloaded shapefiles from the Census Bureau bc it’s a point and click interface and I couldn’t be bothered to learn the API for this.
NOTE – I’m assuming you’re running this where the folder containing the shapefiles is at the same level as your working directory.
nj <- readOGR(dsn="tl_2018_34_tract")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/paytonk/Desktop/PAWS/tl_2018_34_tract", layer: "tl_2018_34_tract"
## with 2010 features
## It has 12 fields
## Integer64 fields read as strings: ALAND AWATER
(We can expand if needed):
Codes from https://www2.census.gov/geo/docs/reference/codes/files/st34_nj_cou.txt
nj_counties <- nj[which(nj$COUNTYFP %in% c("005","007","015")),]
Codes from https://www2.census.gov/geo/docs/reference/codes/files/st42_pa_cou.txt
pa <- readOGR(dsn = "tl_2018_42_tract")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/paytonk/Desktop/PAWS/tl_2018_42_tract", layer: "tl_2018_42_tract"
## with 3218 features
## It has 12 fields
## Integer64 fields read as strings: ALAND AWATER
pa_counties <- pa[which(pa$COUNTYFP %in% c("017","045","091", "101")),]
These counties are (presumably) the PAWS catchment for volunteers, adopters etc.
paws_catchment <- rbind(nj_counties, pa_counties)
head(paws_catchment@data)
## STATEFP COUNTYFP TRACTCE GEOID NAME NAMELSAD MTFCC
## 17 34 005 701001 34005701001 7010.01 Census Tract 7010.01 G5020
## 67 34 007 603901 34007603901 6039.01 Census Tract 6039.01 G5020
## 68 34 007 603902 34007603902 6039.02 Census Tract 6039.02 G5020
## 69 34 007 608202 34007608202 6082.02 Census Tract 6082.02 G5020
## 70 34 007 608205 34007608205 6082.05 Census Tract 6082.05 G5020
## 71 34 007 608206 34007608206 6082.06 Census Tract 6082.06 G5020
## FUNCSTAT ALAND AWATER INTPTLAT INTPTLON
## 17 S 5074160 7737 +40.0479570 -074.9123416
## 67 S 2197028 20755 +39.9031695 -075.0544482
## 68 S 1391656 31552 +39.9003800 -075.0595259
## 69 S 3392438 25352 +39.8238439 -075.0403268
## 70 S 2683492 30840 +39.8366833 -075.0401696
## 71 S 2783789 30232 +39.8453497 -075.0540964
Clean up – the shapefiles are pretty big.
rm(list = c("nj", "pa", "nj_counties", "pa_counties"))
makePolygons <- function(map, addresses, lat_col_name, long_col_name) {
coordinates <- SpatialPoints(addresses[c(long_col_name,lat_col_name)])
# IMPORTANT -- add a projection e.g. mercator, etc.
proj4string(coordinates) <- proj4string(map)
polygon_data <- over(coordinates, map)
return(cbind (polygon_data, addresses))
}
addresses_w_census <- makePolygons(paws_catchment, geocoded_1, "lat", "lon")
addresses_w_census
## STATEFP COUNTYFP TRACTCE GEOID NAME NAMELSAD MTFCC
## 1 42 101 009000 42101009000 90 Census Tract 90 G5020
## 2 42 101 001300 42101001300 13 Census Tract 13 G5020
## 3 42 101 002802 42101002802 28.02 Census Tract 28.02 G5020
## FUNCSTAT ALAND AWATER INTPTLAT INTPTLON lat lon ID
## 1 S 434900 0 +39.9595247 -075.1906334 39.95615 -75.19291 123
## 2 S 727890 60713 +39.9436029 -075.1858337 39.94525 -75.18541 234
## 3 S 362691 0 +39.9288001 -075.1614073 39.92938 -75.15996 653
## Street City State ZIP
## 1 3535 Market St. Philadelphia PA 19104
## 2 2716 South Street Philadelphia PA 19148
## 3 1915 S 9th St. Philadelphia PA 19147
This is hard to make big enough to be interesting, with all the counties I chose…
library(broom)
paws_fortified <- tidy(paws_catchment, region = "GEOID")
paws_map <- ggplot() +
geom_polygon(data=paws_fortified,
aes(x=long, y=lat, group=group, fill=NA),
color = "black", fill=NA, size=0.1) +
geom_point(data=addresses_w_census, aes(x=lon, y=lat, color="red", shape=".", alpha=0.5)) +
coord_map() +
theme_nothing()
paws_map
This is better for zooming, etc. We could make different markers for different stakeholders e.g. volunteers, adopters, surrenderers, etc.
library(leaflet)
interactive_map <- leaflet(paws_catchment) %>%
setView(lng = mean(as.numeric(addresses_w_census$lon), na.rm=TRUE),
lat = mean(as.numeric(addresses_w_census$lat), na.rm=TRUE), zoom = 11) %>%
addPolygons(
data = paws_catchment,
weight = 1, # border thickness
opacity = 0.5, # border opacity
color = "black" # border color
) %>%
addMarkers(lng = addresses_w_census$lon, lat=addresses_w_census$lat)
interactive_map